Datasets Preparation
Currency Exchange Rates
CurrExchRatesClean <- currExchRates %>% select(which(colMeans(is.na(.)) < 0.2))
CurrExchRatesClean <- CurrExchRatesClean %>% mutate(Euro = ifelse(Date < as.Date("1998-10-30", format="%Y-%m-%d"), 0, Euro))
CurrExchRatesClean <- CurrExchRatesClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
CurrExchRatesClean <- na_ma(CurrExchRatesClean, k = 1)
st(CurrExchRatesClean)
Summary Statistics
|
Variable
|
N
|
Mean
|
Std. Dev.
|
Min
|
Pctl. 25
|
Pctl. 75
|
Max
|
|
Australian.Dollar
|
5978
|
0.768
|
0.143
|
0.483
|
0.665
|
0.869
|
1.105
|
|
Bahrain.Dinar
|
5978
|
0.376
|
0
|
0.376
|
0.376
|
0.376
|
0.376
|
|
Brazilian.Real
|
5978
|
2.158
|
0.765
|
0.832
|
1.72
|
2.717
|
4.195
|
|
Canadian.Dollar
|
5978
|
1.266
|
0.187
|
0.917
|
1.086
|
1.403
|
1.613
|
|
Colombian.Peso
|
5978
|
2082.838
|
589.734
|
833.18
|
1795.37
|
2450.322
|
3434.89
|
|
Danish.Krone
|
5978
|
6.282
|
0.912
|
4.665
|
5.612
|
6.804
|
9.006
|
|
Euro
|
5978
|
1.007
|
0.476
|
0
|
0.938
|
1.314
|
1.599
|
|
Icelandic.Krona
|
5978
|
92.418
|
24.739
|
54.72
|
70.42
|
116.888
|
147.98
|
|
Indian.Rupee
|
5978
|
48.181
|
9.451
|
31.37
|
43.133
|
52.967
|
68.778
|
|
Japanese.Yen
|
5978
|
107.974
|
14.065
|
75.86
|
100.765
|
118.395
|
147
|
|
Korean.Won
|
5978
|
1103.062
|
160.752
|
756
|
1016.675
|
1186.8
|
1964.8
|
|
Kuwaiti.Dinar
|
5978
|
0.294
|
0.011
|
0.265
|
0.287
|
0.303
|
0.309
|
|
Libyan.Dinar
|
5978
|
1.507
|
0.61
|
0.525
|
0.662
|
1.932
|
1.932
|
|
Malaysian.Ringgit
|
5978
|
3.504
|
0.491
|
2.436
|
3.182
|
3.8
|
4.725
|
|
Nepalese.Rupee
|
5978
|
77.184
|
15.094
|
49.88
|
68.475
|
84.698
|
109.98
|
|
New.Zealand.Dollar
|
5978
|
0.66
|
0.118
|
0.393
|
0.579
|
0.736
|
0.882
|
|
Norwegian.Krone
|
5978
|
6.964
|
1.087
|
4.959
|
6.107
|
7.803
|
9.606
|
|
Pakistani.Rupee
|
5978
|
70.139
|
23.441
|
30.877
|
51.866
|
90.937
|
115.697
|
|
Qatar.Riyal
|
5978
|
3.64
|
0
|
3.64
|
3.64
|
3.64
|
3.64
|
|
Rial.Omani
|
5978
|
0.384
|
0
|
0.384
|
0.384
|
0.384
|
0.385
|
|
Saudi.Arabian.Riyal
|
5978
|
3.749
|
0.002
|
3.745
|
3.745
|
3.75
|
3.75
|
|
Singapore.Dollar
|
5978
|
1.503
|
0.183
|
1.201
|
1.361
|
1.688
|
1.851
|
|
South.African.Rand
|
5978
|
8.076
|
2.849
|
3.53
|
6.218
|
9.941
|
16.771
|
|
Sri.Lanka.Rupee
|
5978
|
102.232
|
28.663
|
49.57
|
77.54
|
126.29
|
157.646
|
|
Swedish.Krona
|
5978
|
7.731
|
1.084
|
5.843
|
6.832
|
8.376
|
10.995
|
|
Swiss.Franc
|
5978
|
1.211
|
0.249
|
0.725
|
0.979
|
1.389
|
1.823
|
|
Thai.Baht
|
5978
|
35.229
|
5.531
|
24.44
|
31.568
|
39.447
|
56.061
|
|
Trinidad.And.Tobago.Dollar
|
5978
|
6.309
|
0.18
|
5.839
|
6.26
|
6.376
|
6.789
|
|
U.A.E..Dirham
|
5978
|
3.672
|
0.001
|
3.671
|
3.672
|
3.672
|
3.675
|
|
U.K..Pound.Sterling
|
5978
|
1.615
|
0.174
|
1.213
|
1.518
|
1.675
|
2.102
|
|
U.S..Dollar
|
5978
|
1
|
0
|
1
|
1
|
1
|
1
|
Gold Prices
goldPricesClean <- goldPrices %>% mutate(
EURO..AM. = ifelse(Date < as.Date("1998-12-31", format="%Y-%m-%d"), 0, EURO..AM.),
EURO..PM. = ifelse(Date < as.Date("1998-12-31", format="%Y-%m-%d"), 0, EURO..PM.),
USD..PM. = ifelse(Date < as.Date("1968-03-14", format="%Y-%m-%d"), 0, USD..PM.),
GBP..PM. = ifelse(Date < as.Date("1968-03-14", format="%Y-%m-%d"), 0, GBP..PM.))
goldPricesClean <- goldPricesClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
goldPricesClean <- na_ma(goldPricesClean, k = 1)
st(goldPricesClean)
Summary Statistics
|
Variable
|
N
|
Mean
|
Std. Dev.
|
Min
|
Pctl. 25
|
Pctl. 75
|
Max
|
|
USD..AM.
|
13585
|
575.166
|
490.165
|
34.77
|
280.5
|
841.75
|
2061.5
|
|
USD..PM.
|
13585
|
574.853
|
490.136
|
0
|
280.3
|
841.1
|
2067.15
|
|
GBP..AM.
|
13585
|
370.554
|
353.367
|
14.485
|
177.63
|
453.906
|
1574.37
|
|
GBP..PM.
|
13585
|
370.381
|
353.32
|
0
|
177.615
|
453.787
|
1569.59
|
|
EURO..AM.
|
13585
|
337.372
|
477.165
|
0
|
0
|
576.189
|
1743.82
|
|
EURO..PM.
|
13585
|
337.309
|
477.073
|
0
|
0
|
576.776
|
1743.43
|
World Development Index and Top Chosen Countries
#https://worldpopulationreview.com/country-rankings/developed-countries
#http://hdr.undp.org/en/content/latest-human-development-index-ranking
hdiTop <- c('Norway', 'Ireland', 'Switzerland', 'Iceland', 'Hong Kong', 'Germany', 'Sweden', 'Netherlands', 'Australia', 'Denmark', 'Finland', 'Singapore', 'United Kingdom', 'New Zealand', 'Belgium', 'Canada', 'United States', 'Austria', 'Israel', 'Japan', 'Slovenia', 'Luxembourg', 'Korea, Rep.', 'Spain', 'France')
worldDevInd$Series.Name<-gsub("\\$","D",worldDevInd$Series.Name)
worldDevIndClean <- worldDevInd %>% select(-Series.Code) %>%
pivot_longer('X1970..YR1970.':'X2020..YR2020.', names_to = "Year", values_to = "Value", values_drop_na = FALSE) %>%
pivot_wider(names_from = "Series.Name", values_from = Value) %>% unchop(everything())
worldDevIndClean <- worldDevIndClean %>%
mutate(Year = substr(Year, 2, 5)) %>%
filter(grepl('', Country.Code)) %>%
relocate(Year, .after = Country.Code)
trial <- as.data.frame(worldDevIndClean)
trial <- worldDevIndClean %>% filter(Country.Name %in% hdiTop)
#trial <- trial %>% filter(Year==2020) %>% arrange(`GDP (current USD)`)
trial <- trial %>% select(which(colMeans(is.na(.)) < 0.10))
trial[is.na(trial)] <- 0
wdiTop <- trial
datatable(skim(wdiTop %>% dplyr::select(-Country.Name, -Country.Code, -Year)) %>% dplyr::select(skim_variable, numeric.mean, numeric.sd, numeric.p0, numeric.p25, numeric.p75, numeric.p100) %>% dplyr::rename(Variable=skim_variable, Mean=numeric.mean, "Std. Dev."=numeric.sd, Min=numeric.p0, "Pctl. 25"=numeric.p25, "Pctl. 75"=numeric.p75, Max=numeric.p100))
S.P. Composite
spCompositeClean <- spComposite %>%
mutate(Cyclically.Adjusted.PE.Ratio = ifelse(Year < as.Date("1880-12-31", format="%Y-%m-%d"), 0, Cyclically.Adjusted.PE.Ratio)) %>% rename(Date = Year)
spCompositeClean <- spCompositeClean%>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
spCompositeClean <- na_ma(spCompositeClean, k = 1)
st(spComposite)
Summary Statistics
|
Variable
|
N
|
Mean
|
Std. Dev.
|
Min
|
Pctl. 25
|
Pctl. 75
|
Max
|
|
S.P.Composite
|
1810
|
327.968
|
685.029
|
2.73
|
7.903
|
164.4
|
4493.28
|
|
Dividend
|
1806
|
6.732
|
12.069
|
0.18
|
0.42
|
7.053
|
59.68
|
|
Earnings
|
1806
|
15.371
|
28.933
|
0.16
|
0.561
|
14.726
|
158.74
|
|
CPI
|
1810
|
62.392
|
76.26
|
6.28
|
10.2
|
102.275
|
273.983
|
|
Long.Interest.Rate
|
1810
|
4.504
|
2.306
|
0.62
|
3.171
|
5.139
|
15.32
|
|
Real.Price
|
1810
|
621.987
|
733.091
|
73.899
|
186.575
|
706.991
|
4477.204
|
|
Real.Dividend
|
1806
|
17.498
|
11.114
|
5.445
|
9.417
|
22.301
|
63.511
|
|
Real.Earnings
|
1806
|
34.907
|
29.841
|
4.576
|
14.063
|
43.768
|
159.504
|
|
Cyclically.Adjusted.PE.Ratio
|
1690
|
17.215
|
7.048
|
4.784
|
11.898
|
20.913
|
44.198
|
Bitcoins’ Data
btcTrvou <- btcTrvou %>% rename(Trvou = Value)
btcMkpru <- btcMkpru %>% rename(Mkpru = Value)
btcDiff <- btcDiff %>% rename(Diff = Value)
btcHrate <- btcHrate %>% rename(Hrate = Value)
btcAllClean <- btcTrvou %>% left_join(btcMkpru) %>% left_join(btcHrate) %>% left_join(btcDiff)
btcAllClean <- btcAllClean %>% mutate(Date = as.Date(Date, format= "%Y-%m-%d"))
btcAllClean <- na_ma(btcAllClean, k = 1)
st(btcAllClean)
Summary Statistics
|
Variable
|
N
|
Mean
|
Std. Dev.
|
Min
|
Pctl. 25
|
Pctl. 75
|
Max
|
|
Trvou
|
4661
|
146728050.012
|
330500192.236
|
0
|
194787.108
|
148443297.885
|
5352015515.54
|
|
Mkpru
|
4661
|
5141.227
|
10990.321
|
0
|
7.2
|
6499.062
|
63554.44
|
|
Hrate
|
4661
|
26480643.671
|
46431207.364
|
0
|
12.016
|
38267606.403
|
198514005.71
|
|
Diff
|
4661
|
3667535953756.88
|
6452342784118.23
|
0
|
1689334.405
|
5363678461480
|
25046487590083
|
Attributes analysis
Gold Prices
GoldAnalysis <- goldPricesClean %>% pivot_longer(cols = 'USD..AM.':'EURO..PM.', names_to = 'Curr', values_to = 'Value')
ggplot(data = GoldAnalysis, aes(x = Date, y = Value, color=as.factor(Curr))) +
geom_point(size=0.1) + facet_wrap(~Curr) + theme_minimal() + labs(color='Currency', title='Gold Prices\' Overview')

S.P. Composite

ggplot(data = spCompositeClean, aes(x = Date, y = S.P.Composite)) +
geom_line(color='blue') + theme_minimal() + labs(title='S.P.Composite through Years') + transition_reveal(Date)

Correlations
Correlation between Hrate and Mkpru
btcPlusSP <- btcAllClean %>% inner_join(spCompositeClean, on='Date')
## Joining, by = "Date"
btcPlusGold <- btcAllClean %>% inner_join(goldPricesClean, on='Date')
## Joining, by = "Date"
ggscatter(btcAllClean, x = "Mkpru", y = "Hrate",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Mkpru", ylab = "Hrate", color = 'plum4')
## `geom_smooth()` using formula 'y ~ x'

Correlations for Bitcoins’ Data Including S.P. Composite
ggscatter(btcPlusSP, x = "Mkpru", y = "S.P.Composite",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Mkpru", ylab = "S.P.Composite", color='maroon2')
## `geom_smooth()` using formula 'y ~ x'

ggscatter(btcPlusSP, x = "Mkpru", y = "CPI",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Mkpru", ylab = "CPI", color='indianred3')
## `geom_smooth()` using formula 'y ~ x'

ggscatter(btcPlusSP, x = "Mkpru", y = "Earnings",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Mkpru", ylab = "Earnings", color='coral2')
## `geom_smooth()` using formula 'y ~ x'

Correlations for Bitcoins’ Data Including Arbitrally Chosen Indexes from WDI
btcYearsSummary <- btcAllClean %>% mutate(Year = format(as.Date(Date, format="%d/%m/%Y"),"%Y")) %>% select(-Date) %>% group_by(Year) %>% dplyr::summarize(MeanDiff = mean(Diff), MeanHrate=mean(Hrate), MeanMkpru=mean(Mkpru), MeanTrvou=mean(Trvou)) %>% filter(Year > 2008 & Year <2021)
btcPlusCountry <- btcYearsSummary %>% inner_join(wdiTop, on='Year')
## Joining, by = "Year"
for (country in hdiTop) {
countryExample <- btcPlusCountry %>% filter(Country.Name == country)
check <- cor(x = countryExample$MeanMkpru, y = countryExample$`GDP per capita (current USD)`, method = "pearson", use = "everything")
if (check < 0.8 | is.na(check)) {next}
print(ggscatter(countryExample, x = "MeanMkpru", y = "GDP per capita (current USD)",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "MeanMkpru", ylab = "GDP per capita (current USD)", color='orchid2', title = country))
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

for (country in hdiTop) {
countryExample <- btcPlusCountry %>% filter(Country.Name == country)
check <- cor(x = countryExample$MeanMkpru, y = countryExample$`Urban population growth (annual %)`, method = "pearson", use = "everything")
if (check < 0.7 | is.na(check)) {next}
print(ggscatter(countryExample, x = "MeanMkpru", y = "Urban population growth (annual %)",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "MeanMkpru", ylab = "Urban population growth (annual %)", color='orchid3', title = country))
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

Model
set.seed(23)
btcAllClean2 <- btcAllClean %>% select(-Date)
inTraining <- createDataPartition(y = btcAllClean2$Mkpru, p = .70, list = FALSE)
train <- btcAllClean2[inTraining,]
valid_and_test <- btcAllClean2[-inTraining,]
inTesting <- createDataPartition(y = valid_and_test$Mkpru, p = .50, list = FALSE)
valid <- valid_and_test[ -inTesting,]
test <- valid_and_test[ inTesting,]
ctrl <- trainControl(
# powtórzona ocena krzyżowa
method = "repeatedcv",
# liczba podziałów
number = 2,
# liczba powtórzeń
repeats = 5)
fit <- train(Mkpru ~ .,
data = train,
method = "rf",
trControl = ctrl,
# Paramter dla algorytmu uczącego
ntree = 10)
## note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .
fit
## Random Forest
##
## 3265 samples
## 3 predictor
##
## No pre-processing
## Resampling: Cross-Validated (2 fold, repeated 5 times)
## Summary of sample sizes: 1633, 1632, 1633, 1632, 1632, 1633, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 2888.579 0.9342471 853.6425
## 3 2783.717 0.9393712 762.8295
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 3.
pred <- predict(fit, newdata = test)
preddf <- data.frame(Pred = pred, Mkpru = test$Mkpru)
View(preddf)